# ----------------------------------------------------------------------
# ' split mbr to daily units
# ' for 2020 only 
# ----------------------------------------------------------------------

load_library = c('bit64','data.table','fst','future.apply','stringr','logger','vroom')
invisible(lapply(load_library, function(x) library(x, character.only=TRUE, quietly= TRUE)))

library(parallel)

# read arguments
args=commandArgs(TRUE)
outfile = args[[1]]
data_type = args[[2]]
universe = args[[3]]
week = as.numeric(args[[4]])

bucket = '/N/project/iuni_doctorshopping'
input_dir = file.path(bucket,'derived_v4_202101','daily_fst',universe)
target_dir = file.path(bucket,'derived_v4_202101','weekly_fst',universe)
target_file = file.path(target_dir, paste0(universe,'_', data_type, 
	'2020W',stringr::str_pad(week, width = 2, pad='0'), '.fst'))

if (data_type == 'r'){
	select_col = c('PATID','PAT_PLANID',"FILL_DT","PRESCRIPT_ID",
	"DEA","NPI","PRESCRIBER_PROV","PHARM","SPCLT_IND",
	"NDC","PRC_TYP","AHFSCLSS","FORM_IND","FORM_TYP","MAIL_IND",	
	"FST_FILL","RFL_NBR","QUANTITY","DAYS_SUP","STRENGTH")
	date_column = 'FILL_DT'

} else if (data_type == 'diag'){
	select_col = c('PATID','PAT_PLANID',"FST_DT",
	"CLMID","DIAG","ICD_FLAG","LOC_CD","POA")
	date_column = 'FST_DT'

} else if (data_type == 'm') {
	select_col = c('PATID','PAT_PLANID','FST_DT','OP_VISIT_ID',
	'ADMIT_CHAN','ADMIT_TYPE','ICD_FLAG','LOC_CD','NDC','POS','PROC_CD','PROCMOD',
	'BILL_PROV','SERVICE_PROV','REFER_PROV','PROV','PROVCAT','TOS_CD','TOS_EXT','UNITS','ALT_UNITS',
	'CLMID','CLMSEQ','CONF_ID','DSTATUS')
	date_column = 'FST_DT'
} else {
	stop('now implemented yet for this data type ', data_type)
}

logger::log_info('now reading data files ...')

all_date = seq(from=as.Date('2020-01-01'), to=as.Date('2020-09-30'),by='1 day')
all_week = as.numeric(gsub('2020 W','',as.character(tsibble::yearweek(all_date))))

pick_dates = all_date[all_week == week]

files_daily = file.path(input_dir,
	paste0(universe,'_',data_type,pick_dates,'.fst'))

logger::log_info('start to combine...')

dt = lapply(files_daily, function(ff){
	read_fst(ff, as.data.table = TRUE)
}) %>% rbindlist


write_fst(dt, target_file, 100)
fwrite(data.table(outfile=target_file), outfile)



